“Baby Mental Life: Study 1” was conducted on MTurk on 2018-07-31 through 2018-08-01.
Our planned sample was 300 participants, and we anticipated that roughly 85-90% of recruited participants would pass all of our attention checks, so we initially recruited 342 participants (on the idea that ~87.5% of 342 ~ 300 participants; note that for administrative purposes we need to recuit participants in batches that were divisible by 9). After filtering out participants who failed at least one of our attention checks, we ended up retaining only 80% of participants, so we recruited an additional 36 participants for a total of 378 people recruited (on the idea that ~80% of 378 ~ 300 participants; note that for administrative purposes we need to recuit participants in batches that were divisible by 9). In a slight deviation from our preregistered recruitment plan, we limited these additional 36 participants to women, because we had an unexpectedly large imbalance of men to women.
UPDATE: On 2018-08-09, KW discovered that there were many repeating GPS locations, a symptom of a recent outbreak of bots on MTurk. As of this date, these data have excluded any participants where there is another participant with an identical set of GPS coordinates as recorded by Qualtrics. This excluded an additional 76 participants (25%).
In the end, we ended up with a sample of 301 participants who passed our attention checks, 225 of whom came from unique GPS coordinates.
Each participant assessed children’s mental capacities at 3 target ages: newborns, 9-month-olds, and 5-year-olds. For each target, they rated 60 mental capacities on a scale from 0 (not at all capable) to 100 (completely capable).
For more details about the study, see our preregistration here.
# load required libraries
library(tidyverse)
[37m── [1mAttaching packages[22m ──────────────────────────────────────── tidyverse 1.2.1 ──[39m
[37m[32m✔[37m [34mggplot2[37m 3.0.0 [32m✔[37m [34mpurrr [37m 0.2.5
[32m✔[37m [34mtibble [37m 1.4.2 [32m✔[37m [34mdplyr [37m 0.7.6
[32m✔[37m [34mtidyr [37m 0.8.1 [32m✔[37m [34mstringr[37m 1.3.1
[32m✔[37m [34mreadr [37m 1.1.1 [32m✔[37m [34mforcats[37m 0.3.0[39m
[37m── [1mConflicts[22m ─────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31m✖[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
library(langcog) # source: https://github.com/langcog/langcog-package
Attaching package: ‘langcog’
The following object is masked from ‘package:base’:
scale
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(lme4)
Loading required package: Matrix
Attaching package: ‘Matrix’
The following object is masked from ‘package:tidyr’:
expand
# set theme for ggplots
theme_set(theme_bw())
# run source code (extra home-made functions)
source("./scripts/max_factors_efa.R")
source("./scripts/plot_fun.R")
source("./scripts/reten_fun.R")
# load in de-identified raw data
d0 <- read.csv("./data/deidentified/baby_mental_life_s1_data.csv") %>% select(-X)
# make question key
question_key <- d0[1,] %>%
t() %>%
data.frame() %>%
rownames_to_column("question_qualtrics") %>%
rename("question_text" = X1) %>%
mutate(question = recode(question_qualtrics,
"Duration..in.seconds." = "Duration",
"Q2" = "Age",
"Q3" = "GenderSex",
"Q3_3_TEXT" = "GenderSex_fillIn",
"Q4" = "EnglishProf",
"Q5" = "FirstLang",
"Q5_2_TEXT" = "FirstLang_fillIn",
"Q18" = "RaceEthnicity",
"Q18_10_TEXT" = "RaceEthnicity_fillIn",
"Q19" = "Education",
"Q20" = "Income",
"Q21" = "MaritalStatus",
"Q21_6_TEXT" = "MaritalStatus_fillIn",
"Q22" = "HouseholdSize",
"Q23" = "Parent",
"Q25" = "ChildrenNumber",
"Q26" = "ChildrenYoungestAge",
"Q26_1_TEXT" = "ChildrenYoungestAge_fillIn1",
"Q26_2_TEXT" = "ChildrenYoungestAge_fillIn2",
"Q27" = "ChildrenOldestAge",
"Q27_1_TEXT" = "ChildrenOldestAge_fillIn1",
"Q27_2_TEXT" = "ChildrenOldestAge_fillIn2",
"Q28" = "Attention",
"Q29" = "Comments",
.default = question_qualtrics),
question = case_when(grepl("the following questions", question_text) ~
gsub("^.*extent is a ", "", question_text),
TRUE ~ question),
question = case_when(grepl("capable of...", question_text) ~
gsub("capable of... ", "", tolower(question)),
TRUE ~ question),
question = gsub(" ", "_", question),
question = gsub("'", "", question),
question = gsub("newborn_-_", "target00mo_", question),
question = gsub("9-month-old_-_", "target09mo_", question),
question = gsub("5-year-old_-_", "target60mo_", question)) %>%
mutate(question = gsub("-", "_", question),
question = gsub(" \\(for_example,_smooth,_rough\\)", "", question))
# rename questions
d1 <- d0 %>%
# get rid of extra info in first two rows
filter(!is.na(as.numeric(as.character(Q2)))) %>%
gather(question_qualtrics, response, -c(ResponseId, duplicateGPS)) %>%
left_join(question_key %>% select(question_qualtrics, question)) %>%
select(-question_qualtrics) %>%
spread(question, response)
NAs introduced by coercionattributes are not identical across measure variables;
they will be droppedJoining, by = "question_qualtrics"
# implement inclusion/exclusion criteria
d2 <- d1 %>%
filter(Age >= 18, Age <= 45,
EnglishProf %in% c("Advanced", "Superior"),
`target00mo_please_select_34` == 34,
`target09mo_please_select_90` == 90,
`target60mo_please_select_4` == 4,
Attention == "Yes")
# remove people with another identical set of GPS coordinates among people who passed attention checks
d3 <- d2 %>%
filter(duplicateGPS == F) %>%
select(-duplicateGPS)
# recode variables & drop extraneous variables
d4 <- d3 %>%
select(-c(EndDate, Finished,
payment, Progress,
RecordedDate, StartDate, Status,
timeEstimate, UserLanguage)) %>%
mutate_at(vars(c(starts_with("target"), Age, ChildrenNumber,
ChildrenOldestAge_fillIn1, ChildrenOldestAge_fillIn2,
ChildrenYoungestAge_fillIn1, ChildrenYoungestAge_fillIn2,
Duration, HouseholdSize)),
funs(as.numeric(.))) %>%
mutate(Education = factor(Education,
levels = c("No schooling completed",
"Nursery school to 8th grade",
"Some high school, no diploma",
"High school graduate, diploma or equivalent (including GED)",
"Some college credit, no degree",
"Trade school, technical school, or vocational school",
"Associate's degree (for example, AA, AS)",
"Bachelor's degree (for example, BA, BS)",
"Master's degree (for example, MA, MS)",
"Doctor or professional degree (for example, PhD, JD, MD, MBA)")),
Income = factor(Income,
levels = c("$5,001 - 15,000",
"$15,001 - 30,000",
"$30,001 - 60,000",
"$60,001 - 90,000",
"$90,001 - 150,000",
"Greater than $150,000",
"Prefer not to say")),
Parent = factor(Parent,
levels = c("No", "Yes")))
NAs introduced by coercion
# make useful datasets
# final dataset with all measured variables
d <- d4 %>% distinct()
# demographic information
d_demo <- d %>%
select(ResponseId, Duration,
Age, starts_with("GenderSex"), starts_with("RaceEthnicity"),
starts_with("FirstLang"),
Education, Income, HouseholdSize,
starts_with("MaritalStatus"),
Parent, starts_with("Children"),
Comments) %>%
mutate(RaceEthnicity_collapse = ifelse(grepl(",([A-Za-z])", RaceEthnicity),
"Multiple", RaceEthnicity)) %>%
mutate(ChildrenOldestAge_collapse = case_when(
ChildrenOldestAge %in% c("My oldest child has not yet been born (I am/my partner is pregnant)", "My oldest child is deceased", "Prefer not to say") ~ ChildrenOldestAge,
ChildrenOldestAge == "In months:" ~
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 1,
"< 1 year",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 18,
"10 - 18 years",
"> 18 years"))))),
ChildrenOldestAge == "In years:" ~
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 1,
"< 1 year",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 18,
"10 - 18 years",
"> 18 years"))))),
TRUE ~ "NA")) %>%
mutate(ChildrenOldestAge_collapse =
factor(ChildrenOldestAge_collapse,
levels = c("My oldest child has not yet been born (I am/my partner is pregnant)",
"< 1 year",
"1 - 3 years",
"3 - 5 years",
"5 - 10 years",
"10 - 18 years",
"> 18 years",
"My oldest child is deceased",
"Prefer not to say"))) %>%
mutate(ChildrenYoungestAge_collapse = case_when(
ChildrenYoungestAge %in% c("My youngest child has not yet been born (I am/my partner is pregnant)", "My youngest child is deceased", "Prefer not to say") ~ ChildrenYoungestAge,
ChildrenYoungestAge == "In months:" ~
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 1,
"< 1 year",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 18,
"10 - 18 years",
"> 18 years"))))),
ChildrenYoungestAge == "In years:" ~
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 1,
"< 1 year",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 18,
"10 - 18 years",
"> 18 years"))))),
TRUE ~ "NA")) %>%
mutate(ChildrenYoungestAge_collapse =
factor(ChildrenYoungestAge_collapse,
levels = c("My Youngest child has not yet been born (I am/my partner is pregnant)",
"< 1 year",
"1 - 3 years",
"3 - 5 years",
"5 - 10 years",
"10 - 18 years",
"> 18 years",
"My Youngest child is deceased",
"Prefer not to say")))
# all assessments of ALL TARGETS, RepsonseId as rownames
d_all <- d %>%
select(ResponseId, starts_with("target"), -contains("please_select")) %>%
gather(question, response, -ResponseId) %>%
mutate(target = gsub("_.*$", "", question),
capacity = gsub("target..mo_", "", question),
subid = paste(ResponseId, target, sep = "_")) %>%
select(-ResponseId, -question, -target) %>%
spread(capacity, response) %>%
column_to_rownames("subid")
# all assessments of NEWBORNS, RepsonseId as rownames
d_00mo <- d_all %>%
rownames_to_column("subid") %>%
filter(grepl("target00mo", subid)) %>%
mutate(subid = gsub("target..mo_", "", subid)) %>%
column_to_rownames("subid")
# all assessments of 9-MONTH-OLDS, RepsonseId as rownames
d_09mo <- d_all %>%
rownames_to_column("subid") %>%
filter(grepl("target09mo", subid)) %>%
mutate(subid = gsub("target..mo_", "", subid)) %>%
column_to_rownames("subid")
# all assessments of 5-YEAR-OLDS, RepsonseId as rownames
d_60mo <- d_all %>%
rownames_to_column("subid") %>%
filter(grepl("target60mo", subid)) %>%
mutate(subid = gsub("target..mo_", "", subid)) %>%
column_to_rownames("subid")
Our primary analysis is an exploratory factor analysis (EFA) collapsing across all 3 target characters (and treating an individual participant’s responses to each character as if they were independent data points) - see the preregistration for more details.
We planned to examine three factor retention protocols in order to determine how many factors to retain: Parallel analysis, minimizing BIC, and a set of preset criteria outlined in Weisman et al. (2017). Here we look at each solution in turn.
We planned to examine oblimin-rotated solutions (which allow factors to correlate), but you could examine other rotation options by selecting a different rotation type here.
chosen_rot <- "oblimin" # preregistered: factors allowed to correlate
# chosen_rot <- "varimax" # orthogonal: factors forced not to correlate
# chosen_rot <- "none" # no rotation
reten_all_PA <- fa.parallel(d_all, plot = F); reten_all_PA
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Call: fa.parallel(x = d_all, plot = F)
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Eigen Values of
reten_all_par <- reten_all_PA$nfact
efa_all_par <- fa(d_all, nfactors = reten_all_par, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_all_par) +
labs(title = paste0("Parallel Analysis (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
scoresplot_fun(efa_all_par, target = "all (study 1)") +
labs(title = "Parallel Analysis") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
Ignoring unknown aesthetics: y
Here’s a multilevel linear regression on these factor scores, with random intercepts and slopes (for target and factor) by participant. Target is coded with linear and quadratic contrasts, and factor is effect-coded for comparison with the grand mean (with the last factor as the base, -1):
efa_all_par_scores <- efa_all_par$scores[] %>%
data.frame() %>%
rownames_to_column("subid") %>%
mutate(target = gsub("^.*_target", "target", subid),
ResponseId = gsub("_target.*$", "", subid)) %>%
select(-subid) %>%
gather(factor, score, -target, -ResponseId) %>%
mutate_at(vars(target, factor), funs(factor))
contrasts(efa_all_par_scores$target) <- contr.poly(3)
contrasts(efa_all_par_scores$factor) <- contr.sum(reten_all_par)
r_all_par <- lmer(score ~ target * factor + (target + factor | ResponseId),
efa_all_par_scores)
summary(r_all_par, corr = F)
Linear mixed model fit by REML ['lmerMod']
Formula: score ~ target * factor + (target + factor | ResponseId)
Data: efa_all_par_scores
REML criterion at convergence: 5181
Scaled residuals:
Min 1Q Median 3Q Max
-4.2514 -0.5125 0.0304 0.5514 3.3609
Random effects:
Groups Name Variance Std.Dev. Corr
ResponseId (Intercept) 0.26818 0.5179
target.L 0.08540 0.2922 -0.44
target.Q 0.02459 0.1568 -0.90 0.35
factor1 0.11311 0.3363 -0.71 0.17 0.45
factor2 0.23800 0.4879 0.34 0.28 0.01 -0.71
factor3 0.02565 0.1602 -0.31 -0.22 0.43 0.53 -0.04
Residual 0.23513 0.4849
Number of obs: 2700, groups: ResponseId, 225
Fixed effects:
Estimate Std. Error t value
(Intercept) -2.187e-15 3.576e-02 0.000
target.L 8.496e-01 2.531e-02 33.560
target.Q 7.120e-02 1.925e-02 3.699
factor1 -4.474e-16 2.764e-02 0.000
factor2 2.641e-15 3.632e-02 0.000
factor3 7.033e-17 1.937e-02 0.000
target.L:factor1 5.125e-01 2.800e-02 18.308
target.Q:factor1 2.749e-01 2.800e-02 9.819
target.L:factor2 -4.397e-01 2.800e-02 -15.707
target.Q:factor2 -1.117e-01 2.800e-02 -3.990
target.L:factor3 3.288e-01 2.800e-02 11.745
target.Q:factor3 -2.922e-01 2.800e-02 -10.439
If we consider t-values > 2 to be “significant” and use the plot above to guide our interpretation, here’s what we conclude:
(Intercept): By definition, because we’re using factor scores, which are similar to z-scores, the intercept is ~0.]target.L: Overall, mental capacity attributions increase with target age.target.Q: This increase is generally non-linear.factor1, factor2, …: By definition, because we’re using factor scores, which are similar to z-scores, the differences in means across factors are ~0.]target.L:factor1: The linear increase is more dramatic (steeper slope) for MR1, compared to other factors.target.Q:factor1: The nonlinearity is more dramatic (curvier and/or concave instead of convex curve) for MR1, compared to other factors.target.L:factor2: The linear increase is less dramatic (shallower slope) for MR2, compared to other factors.target.Q:factor2: The nonlinearity is less dramatic (flatter curve) for MR2, compared to other factors.target.L:factor3: The linear increase is more dramatic (steeper slope) for MR3, compared to other factors.target.Q:factor3: The nonlinearity is less dramatic (flatter curve) for MR3, compared to other factors.itemsplot_fun(efa_all_par, target = "all") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
|================================================ | 99% ~0 s remaining
Joining, by = c("capacity", "factor", "order")
reten_all_vss <- VSS(d_all, plot = F); reten_all_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.96 with 1 factors
VSS complexity 2 achieves a maximimum of 0.99 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 6 factors
BIC achieves a minimum of -6032.72 with 6 factors
Sample Size adjusted BIC achieves a minimum of -1597.88 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC
1 0.96 0.00 0.0520 1710 15608 0.0e+00 53.5 0.96 0.112 4468 9898
2 0.60 0.99 0.0134 1651 7366 0.0e+00 11.9 0.99 0.073 -3390 1853
3 0.44 0.88 0.0076 1593 5252 0.0e+00 8.4 0.99 0.060 -5126 -68
4 0.42 0.80 0.0058 1536 4124 2.4e-235 6.5 0.99 0.051 -5883 -1006
5 0.41 0.79 0.0057 1480 3667 4.4e-186 5.9 1.00 0.048 -5974 -1275
6 0.42 0.80 0.0056 1425 3251 6.4e-144 5.5 1.00 0.045 -6033 -1508
7 0.42 0.80 0.0059 1371 3016 3.6e-125 5.2 1.00 0.044 -5915 -1562
8 0.42 0.80 0.0062 1318 2804 3.5e-109 4.9 1.00 0.042 -5783 -1598
complex eChisq SRMR eCRMS eBIC
1 1.0 27560 0.107 0.109 16420
2 1.5 3161 0.036 0.038 -7595
3 1.9 1548 0.025 0.027 -8830
4 2.2 803 0.018 0.020 -9203
5 2.3 623 0.016 0.018 -9018
6 2.4 511 0.015 0.016 -8772
7 2.4 448 0.014 0.016 -8483
8 2.4 390 0.013 0.015 -8196
reten_all_bic <- data.frame(reten_all_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_all_bic <- fa(d_all, nfactors = reten_all_bic, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
convergence not obtained in GPFoblq. 1000 iterations used. A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_all_bic) +
labs(title = paste0("Minimizing BIC (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
scoresplot_fun(efa_all_bic, target = "all (study 1)") +
labs(title = "Minimizing BIC") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
Ignoring unknown aesthetics: y
Here’s a multilevel linear regression on these factor scores, with random intercepts and slopes (for target and factor) by participant. Target is coded with linear and quadratic contrasts, and factor is effect-coded for comparison with the grand mean (with the last factor as the base, -1):
efa_all_bic_scores <- efa_all_bic$scores[] %>%
data.frame() %>%
rownames_to_column("subid") %>%
mutate(target = gsub("^.*_target", "target", subid),
ResponseId = gsub("_target.*$", "", subid)) %>%
select(-subid) %>%
gather(factor, score, -target, -ResponseId) %>%
mutate_at(vars(target, factor), funs(factor))
contrasts(efa_all_bic_scores$target) <- contr.poly(3)
contrasts(efa_all_bic_scores$factor) <- contr.sum(reten_all_bic)
r_all_bic <- lmer(score ~ target * factor + (target + factor | ResponseId),
efa_all_bic_scores,
control = lmerControl(optCtrl = list(maxfun = 100000)))
If we consider t-values > 2 to be “significant” and use the plot above to guide our interpretation, here’s what we conclude:
(Intercept): By definition, because we’re using factor scores, which are similar to z-scores, the intercept is ~0.]target.L: Overall, mental capacity attributions increase with target age.target.Q: This increase is not generally non-linear.factor1, factor2, …: By definition, because we’re using factor scores, which are similar to z-scores, the differences in means across factors are ~0.]target.L:factor1: The linear increase is more dramatic (steeper slope) for MR1, compared to other factors.target.Q:factor1: The nonlinearity is more dramatic (curvier and/or concave instead of convex curve) for MR1, compared to other factors.target.L:factor2: The linear increase is less dramatic (shallower slope) for MR2, compared to other factors.target.Q:factor2: The nonlinearity for MR2 is comparable to other factors.target.L:factor3: The linear increase is more dramatic (steeper slope) for MR3, compared to other factors.target.Q:factor3: The nonlinearity is less dramatic (flatter curve) for MR3, compared to other factors.target.L:factor4: The linear increase is less dramatic (shallower slope) for MR4, compared to other factors.target.Q:factor4: The nonlinearity is more dramatic (curvier and/or concanve instead of convex curve) for MR4, compared to other factors.target.L:factor5: The linear increase is more dramatic (steeper slope) for MR5, compared to other factors.target.Q:factor5: The nonlinearity is less dramatic (flatter curve) for MR5, compared to other factors.itemsplot_fun(efa_all_bic, target = "all") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
|======================================== | 86% ~0 s remaining
|======================================== | 87% ~0 s remaining
|========================================== | 89% ~0 s remaining
|=========================================== | 92% ~0 s remaining
|============================================ | 95% ~0 s remaining
|============================================= | 97% ~0 s remaining
|============================================== | 99% ~0 s remaining
Joining, by = c("capacity", "factor", "order")
reten_all_k <- reten_fun(d_all, rot_type = chosen_rot)
print(paste("Preset criteria suggest retaining", reten_all_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_all_k <- fa(d_all, nfactors = reten_all_k, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_all_k) +
labs(title = paste0("Preset criteria (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
scoresplot_fun(efa_all_k, target = "all (study 1)") +
labs(title = "Preset factor retention criteria (Weisman et al., 2017)")
Ignoring unknown aesthetics: y
Here’s a multilevel linear regression on these factor scores, with random intercepts and slopes (for target and factor) by participant. Target is coded with linear and quadratic contrasts, and factor is effect-coded for comparison with the grand mean (with the last factor as the base, -1):
efa_all_k_scores <- efa_all_k$scores[] %>%
data.frame() %>%
rownames_to_column("subid") %>%
mutate(target = gsub("^.*_target", "target", subid),
ResponseId = gsub("_target.*$", "", subid)) %>%
select(-subid) %>%
gather(factor, score, -target, -ResponseId) %>%
mutate_at(vars(target, factor), funs(factor))
contrasts(efa_all_k_scores$target) <- contr.poly(3)
contrasts(efa_all_k_scores$factor) <- contr.sum(reten_all_k)
r_all_k <- lmer(score ~ target * factor + (target + factor | ResponseId),
efa_all_k_scores)
summary(r_all_k, corr = F)
Linear mixed model fit by REML ['lmerMod']
Formula: score ~ target * factor + (target + factor | ResponseId)
Data: efa_all_k_scores
REML criterion at convergence: 2466.4
Scaled residuals:
Min 1Q Median 3Q Max
-3.7025 -0.4841 0.0034 0.5413 2.7814
Random effects:
Groups Name Variance Std.Dev. Corr
ResponseId (Intercept) 0.26690 0.5166
target.L 0.08731 0.2955 -0.48
target.Q 0.02408 0.1552 -0.93 0.26
factor1 0.09109 0.3018 -0.63 -0.09 0.48
Residual 0.18763 0.4332
Number of obs: 1350, groups: ResponseId, 225
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.800e-15 3.640e-02 0.000
target.L 1.050e+00 2.837e-02 36.991
target.Q 3.457e-02 2.289e-02 1.510
factor1 -1.095e-15 2.332e-02 0.000
target.L:factor1 3.327e-01 2.042e-02 16.292
target.Q:factor1 2.042e-01 2.042e-02 9.999
There are some problems with this model that I haven’t addressed - see warnings above. Ignoring them for now…
If we consider t-values > 2 to be “significant” and use the plot above to guide our interpretation, here’s what we conclude:
(Intercept): By definition, because we’re using factor scores, which are similar to z-scores, the intercept is ~0.]target.L: Overall, mental capacity attributions increase with target age.target.Q: This increase is generally non-linear.factor1: By definition, because we’re using factor scores, which are similar to z-scores, the differences in means across factors are ~0.]target.L:factor1: The linear increase is more dramatic (steeper slope) for MR1, compared to other factors.target.Q:factor1: The nonlinearity is more dramatic (curvier and/or concave instead of convex curve) for MR1, compared to other factors.itemsplot_fun(efa_all_k, target = "all") +
labs(title = "Preset factor retention criteria (Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
What happens if we limit ourselves to assessments of newborns’ mental capacities?
reten_00mo_PA <- fa.parallel(d_00mo, plot = F); reten_00mo_PA
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Call: fa.parallel(x = d_00mo, plot = F)
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Eigen Values of
Original factors Simulated data Original components simulated data
1 20.21 1.26 20.83 2.20
2 6.83 1.09 7.59 2.07
3 1.76 1.01 2.47 1.99
4 1.25 0.94 1.87 1.93
reten_00mo_par <- reten_00mo_PA$nfact
efa_00mo_par <- fa(d_00mo, nfactors = reten_00mo_par, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_00mo_par) +
labs(title = paste0("Parallel Analysis (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_00mo_par, target = "newborns") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_00mo_vss <- VSS(d_00mo, plot = F); reten_00mo_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.83 with 1 factors
VSS complexity 2 achieves a maximimum of 0.94 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 5 factors
BIC achieves a minimum of -5953.64 with 3 factors
Sample Size adjusted BIC achieves a minimum of -1158.37 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC
1 0.83 0.00 0.0329 1710 4978 1.1e-315 87 0.83 0.100 -4284 1135
2 0.67 0.94 0.0124 1651 3203 5.4e-102 29 0.94 0.072 -5739 -506
3 0.59 0.92 0.0094 1593 2674 5.4e-58 23 0.96 0.062 -5954 -905
4 0.44 0.78 0.0087 1536 2407 1.3e-41 20 0.96 0.058 -5912 -1044
5 0.43 0.79 0.0086 1480 2223 7.5e-33 18 0.96 0.055 -5793 -1103
6 0.43 0.78 0.0087 1425 2080 2.0e-27 17 0.97 0.054 -5638 -1122
7 0.43 0.77 0.0089 1371 1950 4.9e-23 16 0.97 0.052 -5475 -1130
8 0.43 0.76 0.0092 1318 1803 9.5e-18 15 0.97 0.050 -5335 -1158
complex eChisq SRMR eCRMS eBIC
1 1.0 13387 0.130 0.132 4125
2 1.4 2498 0.056 0.058 -6444
3 1.6 1609 0.045 0.047 -7019
4 1.9 1207 0.039 0.042 -7112
5 2.0 1030 0.036 0.039 -6985
6 2.1 895 0.034 0.037 -6823
7 2.3 785 0.031 0.036 -6640
8 2.3 697 0.030 0.034 -6441
reten_00mo_bic <- data.frame(reten_00mo_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_00mo_bic <- fa(d_00mo, nfactors = reten_00mo_bic, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_00mo_bic) +
labs(title = paste0("Minimizing BIC (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_00mo_bic, target = "newborns") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_00mo_k <- reten_fun(d_00mo, rot_type = chosen_rot)
print(paste("Preset criteria suggest retaining", reten_00mo_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_00mo_k <- fa(d_00mo, nfactors = reten_00mo_k, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_00mo_k) +
labs(title = paste0("Preset criteria (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_00mo_k, target = "newborns") +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
What happens if we limit ourselves to assessments of 9-month-olds’ mental capacities?
reten_09mo_PA <- fa.parallel(d_09mo, plot = F); reten_09mo_PA
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Call: fa.parallel(x = d_09mo, plot = F)
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Eigen Values of
Original factors Simulated data Original components simulated data
1 26.33 1.24 26.86 2.18
2 7.41 1.09 8.07 2.07
3 1.46 1.01 2.04 1.99
4 1.14 0.93 1.67 1.91
reten_09mo_par <- reten_09mo_PA$nfact
efa_09mo_par <- fa(d_09mo, nfactors = reten_09mo_par, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_09mo_par) +
labs(title = paste0("Parallel analysis (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_09mo_par, target = "9-month-olds") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_09mo_vss <- VSS(d_09mo, plot = F); reten_09mo_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.9 with 1 factors
VSS complexity 2 achieves a maximimum of 0.98 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 5 factors
BIC achieves a minimum of -5884.26 with 4 factors
Sample Size adjusted BIC achieves a minimum of -1149.05 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC
1 0.90 0.00 0.0486 1710 5771 0.0e+00 84.4 0.90 0.110 -3490 1929
2 0.67 0.98 0.0138 1651 3394 9.8e-123 19.3 0.98 0.076 -5548 -316
3 0.47 0.87 0.0106 1593 2826 7.0e-72 15.2 0.98 0.066 -5802 -754
4 0.43 0.76 0.0090 1536 2435 7.3e-44 12.6 0.98 0.059 -5884 -1016
5 0.42 0.75 0.0088 1480 2241 3.7e-34 11.2 0.99 0.056 -5775 -1085
6 0.42 0.75 0.0088 1425 2082 1.5e-27 10.3 0.99 0.054 -5636 -1120
7 0.42 0.74 0.0089 1371 1939 2.5e-22 9.5 0.99 0.052 -5486 -1141
8 0.41 0.74 0.0092 1318 1812 2.6e-18 8.8 0.99 0.050 -5326 -1149
complex eChisq SRMR eCRMS eBIC
1 1.0 14525 0.135 0.137 5264
2 1.4 1763 0.047 0.049 -7179
3 1.8 1147 0.038 0.040 -7481
4 2.1 785 0.031 0.034 -7534
5 2.2 642 0.028 0.031 -7374
6 2.3 546 0.026 0.029 -7172
7 2.4 474 0.024 0.028 -6951
8 2.4 417 0.023 0.027 -6721
reten_09mo_bic <- data.frame(reten_09mo_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_09mo_bic <- fa(d_09mo, nfactors = reten_09mo_bic, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_09mo_bic) +
labs(title = paste0("Minimizing BIC (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_09mo_bic, target = "9-month-olds") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_09mo_k <- reten_fun(d_09mo, rot_type = chosen_rot)
print(paste("Preset criteria suggest retaining", reten_09mo_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_09mo_k <- fa(d_09mo, nfactors = reten_09mo_k, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_09mo_k) +
labs(title = paste0("Preset criteria (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_09mo_k, target = "9-month-olds") +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
What happens if we limit ourselves to assessments of 5-year-olds’ mental capacities?
reten_60mo_PA <- fa.parallel(d_60mo, plot = F); reten_60mo_PA
Parallel analysis suggests that the number of factors = 3 and the number of components = 2
Call: fa.parallel(x = d_60mo, plot = F)
Parallel analysis suggests that the number of factors = 3 and the number of components = 2
Eigen Values of
Original factors Simulated data Original components simulated data
1 32.53 1.29 32.96 2.23
2 6.13 1.11 6.67 2.09
3 1.27 1.01 1.76 1.99
reten_60mo_par <- reten_60mo_PA$nfact
efa_60mo_par <- fa(d_60mo, nfactors = reten_60mo_par, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_60mo_par) +
labs(title = paste0("Parallel analysis (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_60mo_par, target = "5-year-olds") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_60mo_vss <- VSS(d_60mo, plot = F); reten_60mo_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.95 with 1 factors
VSS complexity 2 achieves a maximimum of 0.99 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 5 factors
BIC achieves a minimum of -4460.84 with 4 factors
Sample Size adjusted BIC achieves a minimum of 61.81 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC
1 0.95 0.00 0.053 1710 7316 0.0e+00 58.4 0.95 0.129 -1945 3474
2 0.62 0.99 0.016 1651 4777 1.1e-300 14.0 0.99 0.099 -4165 1067
3 0.49 0.84 0.013 1593 4219 4.5e-236 10.9 0.99 0.093 -4409 639
4 0.52 0.79 0.013 1536 3858 8.0e-200 9.4 0.99 0.090 -4461 407
5 0.49 0.77 0.012 1480 3594 1.3e-176 8.2 0.99 0.088 -4422 269
6 0.46 0.78 0.013 1425 3391 2.8e-161 7.5 0.99 0.086 -4327 189
7 0.52 0.77 0.013 1371 3219 8.2e-150 6.8 0.99 0.086 -4207 138
8 0.50 0.76 0.013 1318 3023 2.5e-135 6.2 0.99 0.084 -4115 62
complex eChisq SRMR eCRMS eBIC
1 1.0 10123 0.113 0.115 862
2 1.5 1386 0.042 0.043 -7556
3 1.9 938 0.034 0.036 -7690
4 2.1 728 0.030 0.032 -7591
5 2.2 597 0.027 0.030 -7419
6 2.2 522 0.026 0.029 -7196
7 2.2 455 0.024 0.027 -6971
8 2.3 394 0.022 0.026 -6744
reten_60mo_bic <- data.frame(reten_60mo_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_60mo_bic <- fa(d_60mo, nfactors = reten_60mo_bic, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_60mo_bic) +
labs(title = paste0("Minimizing BIC (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_60mo_bic, target = "5-year-olds") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_60mo_k <- reten_fun(d_60mo, rot_type = chosen_rot)
print(paste("Preset criteria suggest retaining", reten_60mo_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_60mo_k <- fa(d_60mo, nfactors = reten_60mo_k, rotate = chosen_rot,
scores = "tenBerge", impute = "median")
heatmap_fun(efa_60mo_k) +
labs(title = paste0("Preset criteria (rotation: ", chosen_rot, ")"),
subtitle = "'% var.' indicates the amount of shared variance explained (total = 100%)")
Joining, by = "capacity"
Joining, by = "factor"
itemsplot_fun(efa_60mo_k, target = "5-year-olds") +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
ggplot(d_demo, aes(x = Duration/60)) +
geom_histogram(binwidth = 2) +
geom_vline(xintercept = median(d_demo$Duration/60), color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 4)) +
labs(title = "Duration of study (according to Qualtrics)",
subtitle = "Blue dotted line marks median",
x = "Duration (in minutes)",
y = "Number of participants")
ggplot(d_demo, aes(x = Age)) +
geom_histogram(binwidth = 2) +
geom_vline(xintercept = median(d_demo$Age), color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 4)) +
labs(title = "Particpiant age (self-reported)",
subtitle = "Blue dotted line marks median",
x = "Age (in years)",
y = "Number of participants")
ggplot(d_demo, aes(x = GenderSex)) +
geom_bar() +
labs(title = "Particpiant gender/sex (self-reported)",
x = "Gender/sex",
y = "Number of participants")
ggplot(d_demo, aes(x = gsub('(.{1,30})(\\s|$)', '\\1\n', RaceEthnicity_collapse))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant race/ethnicity (self-reported)",
x = "Race/ethnicity",
y = "Number of participants")
ggplot(d_demo, aes(x = FirstLang)) +
geom_bar() +
labs(title = "Particpiant first language (self-reported)",
x = "Language",
y = "Number of participants")
ggplot(d_demo, aes(x = factor(Education,
levels = levels(d$Education),
labels = gsub('(.{1,30})(\\s|$)', '\\1\n',
levels(d$Education))))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant educational attainment (self-reported)",
x = "Highest level of education completed",
y = "Number of participants")
ggplot(d_demo, aes(x = Income)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant household income (self-reported)",
x = "Annual household income",
y = "Number of participants")
ggplot(d_demo, aes(x = HouseholdSize)) +
geom_histogram(binwidth = 1) +
geom_vline(xintercept = median(d_demo$HouseholdSize), color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 1)) +
labs(title = "Particpiant household size (self-reported)",
subtitle = "Blue dotted line marks median",
x = "Number of people in household (adults and children)",
y = "Number of participants")
ggplot(d_demo, aes(x = MaritalStatus)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant marital status (self-reported)",
x = "Marital status",
y = "Number of participants")
ggplot(d_demo, aes(x = Parent)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant parent status (self-reported)",
subtitle = "'NA' indicates response of 'Prefer not to say'",
x = "Parent status",
y = "Number of participants")
ggplot(d_demo %>% filter(Parent == "Yes"), aes(x = ChildrenNumber)) +
geom_histogram(binwidth = 1) +
geom_vline(xintercept = median(d_demo[d_demo$Parent == "Yes",]$ChildrenNumber, na.rm = T),
color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 1)) +
labs(title = "Number of children among parents (self-reported)",
subtitle = "Blue dotted line marks median",
x = "Number of children (among parents)",
y = "Number of participants")
ggplot(d_demo %>% filter(Parent == "Yes"),
aes(x = factor(ChildrenOldestAge_collapse,
levels = levels(d_demo$ChildrenOldestAge_collapse),
labels = gsub('(.{1,30})(\\s|$)', '\\1\n',
levels(d_demo$ChildrenOldestAge_collapse))))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Age of oldest child among parents (self-reported)",
x = "Age of child in years (among parents)",
y = "Number of participants")
ggplot(d_demo %>% filter(Parent == "Yes"),
aes(x = factor(ChildrenYoungestAge_collapse,
levels = levels(d_demo$ChildrenYoungestAge_collapse),
labels = gsub('(.{1,30})(\\s|$)', '\\1\n',
levels(d_demo$ChildrenYoungestAge_collapse))))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Age of youngest child among parents (self-reported)",
x = "Age of child in years (among parents)",
y = "Number of participants")
Finally, I’ll export the 4-factor EFA results to a saved file that I can draw on in our Study 2 analyses.
saveRDS(efa_all_par, file = "./s1_efa.rds")
Error in saveRDS(efa_all_par, file = "./s1_efa.rds") :
object 'efa_all_par' not found